import os
import random

# ROOT_PATH = 'D:\\dataset\\voxceleb2_wav\\vox2_aac\\dev\\aac'
# OUT_FILENAME = '../lists/train_list.txt'
ROOT_PATH = 'D:\\dataset\\voxceleb2_wav\\vox2_aac\\dev\\aac'
OUT_FILENAME = '../lists/vox2_train_list.txt'
PERCENT = 100

nspeaker = 6000


def list_file(parent_path, level, id, fs):
    sub_files = os.listdir(parent_path)
    num = 0
    for sub_file in sub_files:
        sub_path = os.path.join(parent_path, sub_file)
        if level == 0:
            if os.path.isdir(sub_path):
                id = sub_file
                # print(id)
                list_file(os.path.join(parent_path, sub_file), level + 1, id, fs)
        elif level == 1:
            if os.path.isdir(sub_path):
                list_file(os.path.join(parent_path, sub_file), level + 1, id, fs)
        else:
            if os.path.isfile(sub_path):
                if random.randint(1, 100) > PERCENT:
                    continue
                path = sub_path[len(ROOT_PATH) + 1:]
                line = '{} {}\n'.format(id, path)
                fs.write(line)
        num += 1
        if num >= nspeaker:
            break
    print("num", num)


if __name__ == '__main__':
    fs = open(OUT_FILENAME, 'w')
    list_file(ROOT_PATH, 0, '', fs)
    fs.close()
